In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import brewer2mpl


from datetime import datetime
from Bio import AlignIO, SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Align.Applications import ClustalOmegaCommandline
from __future__ import division

%matplotlib inline

Background

The FASTA files are aligned, thus, there are gaps in the sequence. Also, there are "partial CDS" sequences present in the alignments. I have already used find-and-replace to remove the gap characters from the FASTA files.

Tasks:

  1. Remove partial CDS sequences
  2. Find longest ORFs, write all to single FASTA file.
  3. Do multiple sequence alignment.

In [2]:
# Remove partial CDS sequences in both the HA and NA fasta files.
ha_sequences = [s for s in SeqIO.parse('H9N2_HA.fasta', 'fasta') if 'complete cds' in s.description]
print(len(ha_sequences))

na_sequences = [s for s in SeqIO.parse('H9N2_NA.fasta', 'fasta') if 'complete cds' in s.description]
print(len(na_sequences))


128
98

In [3]:
# Function to find longest ORFs:
def longest_orfs(list_of_seqrecords):
    aa_sequences = []
    for record in list_of_seqrecords:
        longest_protein = SeqRecord(id=record.id, seq='')
        for frame in range(3):
            length = 3 * ((len(record) - frame) // 3)
            for pro in record.seq[frame:frame + length].translate().split("*"):
                if len(pro) > len(longest_protein.seq):
                    longest_protein.seq = pro
        aa_sequences.append(longest_protein)
        
    return aa_sequences

In [4]:
# # Grab out longest ORFs and write to disk
na_orfs = longest_orfs(na_sequences)
SeqIO.write(na_orfs, 'H9N2_NA_CDS.fasta','fasta')
ha_orfs = longest_orfs(ha_sequences)
SeqIO.write(ha_orfs, 'H9N2_HA_CDS.fasta', 'fasta')
# na_orfs


Out[4]:
128

In [5]:
# # Perform multiple sequence alignment using Clustal Omega.
# # Uncomment this cell only if you need to run another multiple sequence alignment.

# genes = ["HA", 'NA']

# for gene in genes:
#     in_file = "H9N2_%s_CDS.fasta" % gene
#     out_file = "H9N2_%s_CDS_Aligned.fasta" % gene
#     cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True, force=True)
#     cline()

In [6]:
# Read in the aligned sequences

gene = "HA"

df = pd.DataFrame(np.array([s for s in AlignIO.parse('H9N2_%s_CDS_Aligned.fasta' % gene, 'fasta')])[0])
# ha_df

# Capture accession and wild or domestic status as an array.
labels = []
for sequence in SeqIO.parse("H9N2_%s_CDS_Aligned.fasta" % gene, "fasta"):
    sequence_id = sequence.id.split("_")
    label = sequence_id[2]
    
    if label == 'D':
        labels.append('domestic')
    if label == 'W':
        labels.append('wild')
        
# Remove any positions that do not contain any variation
for column in df.columns:
    if len(np.unique(df[column].values)) == 1:
        del df[column]

# df

In [7]:
from sklearn.preprocessing import LabelEncoder
from random import shuffle
# Encode the amino acids with categorial labels 1-20
le = LabelEncoder()
amino_acid_code = (list('ABCDEFGHIJKLMNPQRSTVWXY-'))
shuffle(amino_acid_code)
print(len(amino_acid_code))
le.fit(amino_acid_code)
a_encoded = le.transform(amino_acid_code)
# a_encoded

# Replace amino acids with integer numbers.
encode_dict = {letter:int(encode) for letter, encode in zip(amino_acid_code, a_encoded)}
# encode_dict
encoded_df = df.replace(to_replace=encode_dict.keys(), value=encode_dict.values())
encoded_df


24
Out[7]:
1 2 3 4 6 7 8 11 12 14 ... 550 551 552 553 554 555 556 557 558 559
0 5 19 9 18 9 19 9 12 20 19 ... 14 7 18 3 17 3 14 9 3 9
1 5 19 9 18 9 19 9 9 18 1 ... 14 7 18 3 17 3 14 9 3 9
2 5 19 9 18 9 19 9 9 18 1 ... 14 7 18 3 17 3 14 9 3 9
3 5 19 9 18 9 19 9 9 18 1 ... 14 7 18 3 17 3 14 9 3 9
4 5 19 9 18 9 19 9 9 18 1 ... 14 7 18 3 17 3 14 9 3 9
5 5 19 9 18 9 19 9 9 18 1 ... 14 7 18 3 17 3 14 9 3 9
6 5 19 9 18 9 19 9 9 18 1 ... 14 7 18 3 17 3 14 9 3 9
7 5 19 9 18 9 19 9 9 18 1 ... 14 7 18 3 17 3 14 9 3 9
8 5 19 9 18 9 19 9 9 18 1 ... 14 7 18 3 17 3 14 9 3 9
9 5 19 9 18 9 19 9 9 18 1 ... 14 7 18 3 17 3 14 9 3 9
10 5 19 9 18 9 19 9 9 18 1 ... 14 7 18 3 17 3 14 9 3 9
11 5 19 20 18 13 19 9 20 1 1 ... 14 7 18 3 17 3 14 9 3 9
12 5 19 20 18 13 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
13 5 19 20 18 13 19 9 20 1 1 ... 14 7 18 3 17 3 19 9 3 9
14 5 19 20 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
15 5 19 20 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
16 5 16 20 18 9 19 9 20 20 1 ... 14 7 6 3 17 3 14 9 3 9
17 5 19 9 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
18 5 19 9 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
19 5 19 9 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
20 5 19 9 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
21 5 19 9 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
22 5 19 9 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
23 5 19 20 18 13 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
24 5 19 20 18 13 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
25 5 19 20 18 13 19 9 20 20 1 ... 14 7 19 3 17 3 14 9 3 9
26 5 19 20 18 13 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
27 5 19 9 18 13 1 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
28 5 19 9 18 13 1 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
29 5 19 9 18 13 1 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
98 5 19 9 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
99 5 19 9 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
100 5 19 19 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
101 5 1 9 18 9 19 9 20 9 19 ... 14 7 18 3 17 3 14 9 3 9
102 11 19 9 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
103 5 19 20 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
104 5 19 9 18 9 1 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
105 5 19 9 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
106 11 19 9 18 20 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
107 11 19 9 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
108 5 19 19 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 18 9 0 0
109 5 19 19 18 12 19 9 20 20 19 ... 14 7 18 3 17 3 18 9 5 0
110 11 19 9 18 20 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
111 11 19 9 18 9 19 9 9 20 19 ... 14 7 18 3 17 3 14 9 3 9
112 11 19 9 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
113 11 19 9 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
114 5 19 9 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
115 11 19 9 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
116 11 19 9 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
117 5 19 9 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
118 5 1 9 18 13 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
119 5 19 9 18 9 19 9 20 20 1 ... 14 7 18 3 17 3 14 9 3 9
120 5 19 9 18 13 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
121 5 19 9 18 13 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
122 5 19 9 18 9 19 9 20 20 19 ... 14 7 18 3 21 3 14 9 3 9
123 7 19 19 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
124 5 19 17 18 9 19 9 20 20 19 ... 14 7 18 3 21 3 14 9 3 9
125 7 19 19 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
126 7 19 19 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9
127 5 19 9 18 9 19 9 20 20 19 ... 14 7 18 3 17 3 14 9 3 9

128 rows × 245 columns


In [8]:
from sklearn.ensemble import RandomForestClassifier

# Run 100 runs of the Random Forest classifier, and average the feature importances.
feature_importance_dict = {pos:[] for pos in encoded_df.columns}

for i in range(2000):
    rf = RandomForestClassifier()
    rf.fit_transform(encoded_df, labels)
    for pos, importance in zip(encoded_df.columns, rf.feature_importances_):
        feature_importance_dict[pos].append(importance)
        
# feature_importance_dict

In [9]:
# Get the mean for each feature_importance
mean_importances = {pos:None for key in feature_importance_dict.keys()}

for pos, importances in feature_importance_dict.items():
    mean_importances[pos] = np.mean(importances)
    
color = brewer2mpl.get_map('Blues', 'Sequential', 3)
plt.scatter(mean_importances.keys(), mean_importances.values(), color='r')
plt.title("Positional Importance for %s Gene" % gene)
plt.xlabel('Position in Amino Acid Alignment')
plt.ylabel('Feature Importance')
plt.savefig("%s Positional Feature Importances.pdf" % gene)



In [10]:
# What positions are the best predictors?
pos_importance = sorted(mean_importances.items(), key=lambda x:x[1])[::-1]
pos_by_importance = [pos for (pos, score) in pos_importance]
importances_df = pd.DataFrame(pos_importance)
importances_df.columns=['Position', 'Relative Importance']
importances_df.to_csv('%s Relative Importances.csv' % gene)
importances_df


Out[10]:
Position Relative Importance
0 539 1.289127e-01
1 199 7.401858e-02
2 234 7.232300e-02
3 305 5.968016e-02
4 221 5.506354e-02
5 269 4.208691e-02
6 167 2.677374e-02
7 233 2.134535e-02
8 380 2.115871e-02
9 554 2.026940e-02
10 243 1.836355e-02
11 203 1.781382e-02
12 197 1.733412e-02
13 165 1.333188e-02
14 336 1.259591e-02
15 78 1.112489e-02
16 468 1.026789e-02
17 382 1.010840e-02
18 3 9.955900e-03
19 312 9.787269e-03
20 232 9.506647e-03
21 65 8.954474e-03
22 206 8.698531e-03
23 193 8.692364e-03
24 89 8.642432e-03
25 172 8.513624e-03
26 71 8.427695e-03
27 540 8.310529e-03
28 238 8.053840e-03
29 504 7.946796e-03
... ... ...
215 228 5.558884e-06
216 265 5.347743e-06
217 381 5.082205e-06
218 398 4.919469e-06
219 311 4.630998e-06
220 550 4.358674e-06
221 513 3.977709e-06
222 247 3.931634e-06
223 458 3.902784e-06
224 129 3.439076e-06
225 537 3.186192e-06
226 153 3.130977e-06
227 338 2.232976e-06
228 516 2.144016e-06
229 299 1.965777e-06
230 19 1.950160e-06
231 146 1.949114e-06
232 480 9.645732e-07
233 362 6.436590e-07
234 437 3.781727e-07
235 290 2.265815e-07
236 59 7.049769e-08
237 466 5.124909e-08
238 58 4.503898e-08
239 548 4.238068e-08
240 486 2.850790e-08
241 487 0.000000e+00
242 481 0.000000e+00
243 478 0.000000e+00
244 529 0.000000e+00

245 rows × 2 columns


In [11]:
zip(df[305].values, labels)


Out[11]:
[('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('L', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('V', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('V', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'domestic'),
 ('I', 'wild'),
 ('I', 'wild'),
 ('I', 'wild'),
 ('I', 'wild'),
 ('V', 'wild'),
 ('V', 'wild'),
 ('V', 'wild'),
 ('V', 'wild'),
 ('V', 'wild'),
 ('V', 'wild')]

In [12]:
# Create pie charts of porportions for each position
from collections import Counter

counts = dict()
for pos in df.columns:
    counts[pos] = Counter(zip(df[pos].values, labels))
counts


Out[12]:
{1: Counter({('E', 'domestic'): 103, ('K', 'domestic'): 9, ('E', 'wild'): 7, ('G', 'domestic'): 5, ('G', 'wild'): 3, ('R', 'domestic'): 1}),
 2: Counter({('T', 'domestic'): 77, ('I', 'domestic'): 32, ('T', 'wild'): 9, ('A', 'domestic'): 7, ('A', 'wild'): 1, ('P', 'domestic'): 1, ('Q', 'domestic'): 1}),
 3: Counter({('I', 'domestic'): 103, ('V', 'domestic'): 11, ('I', 'wild'): 6, ('T', 'domestic'): 4, ('T', 'wild'): 3, ('R', 'wild'): 1}),
 4: Counter({('S', 'domestic'): 91, ('P', 'domestic'): 27, ('S', 'wild'): 10}),
 6: Counter({('M', 'domestic'): 68, ('I', 'domestic'): 42, ('I', 'wild'): 7, ('T', 'domestic'): 5, ('M', 'wild'): 3, ('V', 'domestic'): 2, ('L', 'domestic'): 1}),
 7: Counter({('T', 'domestic'): 108, ('T', 'wild'): 10, ('A', 'domestic'): 8, ('I', 'domestic'): 2}),
 8: Counter({('I', 'domestic'): 108, ('V', 'domestic'): 10, ('I', 'wild'): 10}),
 11: Counter({('V', 'domestic'): 79, ('L', 'domestic'): 28, ('I', 'domestic'): 11, ('V', 'wild'): 10}),
 12: Counter({('V', 'domestic'): 102, ('V', 'wild'): 10, ('S', 'domestic'): 10, ('I', 'domestic'): 4, ('A', 'domestic'): 2}),
 14: Counter({('T', 'domestic'): 80, ('A', 'domestic'): 36, ('T', 'wild'): 9, ('S', 'domestic'): 1, ('A', 'wild'): 1, ('V', 'domestic'): 1}),
 15: Counter({('S', 'domestic'): 105, ('N', 'domestic'): 10, ('S', 'wild'): 10, ('I', 'domestic'): 2, ('G', 'domestic'): 1}),
 16: Counter({('N', 'domestic'): 117, ('N', 'wild'): 10, ('Y', 'domestic'): 1}),
 19: Counter({('K', 'domestic'): 117, ('K', 'wild'): 10, ('N', 'domestic'): 1}),
 24: Counter({('H', 'domestic'): 100, ('Y', 'domestic'): 18, ('H', 'wild'): 9, ('Y', 'wild'): 1}),
 32: Counter({('T', 'domestic'): 116, ('T', 'wild'): 10, ('A', 'domestic'): 2}),
 34: Counter({('D', 'domestic'): 117, ('D', 'wild'): 10, ('N', 'domestic'): 1}),
 37: Counter({('T', 'domestic'): 115, ('T', 'wild'): 10, ('A', 'domestic'): 3}),
 39: Counter({('T', 'domestic'): 115, ('T', 'wild'): 10, ('N', 'domestic'): 2, ('A', 'domestic'): 1}),
 41: Counter({('V', 'domestic'): 109, ('V', 'wild'): 10, ('I', 'domestic'): 9}),
 45: Counter({('H', 'domestic'): 111, ('H', 'wild'): 9, ('Q', 'domestic'): 6, ('D', 'domestic'): 1, ('Q', 'wild'): 1}),
 47: Counter({('K', 'domestic'): 117, ('K', 'wild'): 10, ('R', 'domestic'): 1}),
 50: Counter({('L', 'domestic'): 117, ('L', 'wild'): 10, ('I', 'domestic'): 1}),
 51: Counter({('H', 'domestic'): 115, ('H', 'wild'): 10, ('P', 'domestic'): 3}),
 52: Counter({('T', 'domestic'): 117, ('T', 'wild'): 10, ('A', 'domestic'): 1}),
 53: Counter({('E', 'domestic'): 117, ('E', 'wild'): 10, ('V', 'domestic'): 1}),
 56: Counter({('G', 'domestic'): 115, ('G', 'wild'): 10, ('E', 'domestic'): 3}),
 57: Counter({('M', 'domestic'): 116, ('M', 'wild'): 10, ('K', 'domestic'): 2}),
 58: Counter({('L', 'domestic'): 117, ('L', 'wild'): 10, ('R', 'domestic'): 1}),
 59: Counter({('C', 'domestic'): 117, ('C', 'wild'): 10, ('G', 'domestic'): 1}),
 62: Counter({('N', 'domestic'): 117, ('N', 'wild'): 10, ('T', 'domestic'): 1}),
 65: Counter({('H', 'domestic'): 62, ('N', 'domestic'): 36, ('R', 'domestic'): 18, ('H', 'wild'): 9, ('L', 'domestic'): 1, ('R', 'wild'): 1, ('Q', 'domestic'): 1}),
 67: Counter({('L', 'domestic'): 117, ('L', 'wild'): 10, ('F', 'domestic'): 1}),
 70: Counter({('D', 'domestic'): 111, ('D', 'wild'): 9, ('N', 'domestic'): 7, ('N', 'wild'): 1}),
 71: Counter({('T', 'domestic'): 118, ('T', 'wild'): 9, ('K', 'wild'): 1}),
 74: Counter({('I', 'domestic'): 117, ('I', 'wild'): 10, ('V', 'domestic'): 1}),
 78: Counter({('I', 'domestic'): 106, ('V', 'domestic'): 12, ('I', 'wild'): 7, ('V', 'wild'): 3}),
 86: Counter({('L', 'domestic'): 67, ('M', 'domestic'): 37, ('L', 'wild'): 9, ('P', 'domestic'): 9, ('Q', 'domestic'): 3, ('M', 'wild'): 1, ('R', 'domestic'): 1, ('T', 'domestic'): 1}),
 89: Counter({('G', 'domestic'): 109, ('E', 'domestic'): 9, ('G', 'wild'): 9, ('E', 'wild'): 1}),
 90: Counter({('G', 'domestic'): 117, ('G', 'wild'): 10, ('K', 'domestic'): 1}),
 91: Counter({('R', 'domestic'): 116, ('R', 'wild'): 10, ('K', 'domestic'): 2}),
 92: Counter({('E', 'domestic'): 117, ('E', 'wild'): 10, ('D', 'domestic'): 1}),
 100: Counter({('P', 'domestic'): 86, ('S', 'domestic'): 32, ('P', 'wild'): 9, ('S', 'wild'): 1}),
 101: Counter({('S', 'domestic'): 110, ('S', 'wild'): 10, ('T', 'domestic'): 8}),
 103: Counter({('V', 'domestic'): 115, ('V', 'wild'): 10, ('A', 'domestic'): 3}),
 109: Counter({('P', 'domestic'): 117, ('P', 'wild'): 10, ('L', 'domestic'): 1}),
 112: Counter({('V', 'domestic'): 106, ('I', 'domestic'): 12, ('V', 'wild'): 10}),
 115: Counter({('L', 'domestic'): 116, ('L', 'wild'): 10, ('V', 'domestic'): 1, ('I', 'domestic'): 1}),
 117: Counter({('E', 'domestic'): 117, ('E', 'wild'): 10, ('K', 'domestic'): 1}),
 118: Counter({('L', 'domestic'): 117, ('L', 'wild'): 10, ('V', 'domestic'): 1}),
 120: Counter({('T', 'domestic'): 89, ('I', 'domestic'): 27, ('T', 'wild'): 10, ('V', 'domestic'): 1, ('A', 'domestic'): 1}),
 125: Counter({('S', 'domestic'): 86, ('A', 'domestic'): 32, ('S', 'wild'): 9, ('A', 'wild'): 1}),
 126: Counter({('S', 'domestic'): 109, ('S', 'wild'): 10, ('R', 'domestic'): 6, ('N', 'domestic'): 3}),
 129: Counter({('Q', 'domestic'): 117, ('Q', 'wild'): 10, ('R', 'domestic'): 1}),
 131: Counter({('I', 'domestic'): 116, ('I', 'wild'): 10, ('V', 'domestic'): 2}),
 132: Counter({('Q', 'domestic'): 117, ('Q', 'wild'): 10, ('L', 'domestic'): 1}),
 133: Counter({('I', 'domestic'): 83, ('M', 'domestic'): 28, ('I', 'wild'): 9, ('L', 'domestic'): 7, ('L', 'wild'): 1}),
 137: Counter({('T', 'domestic'): 114, ('T', 'wild'): 9, ('S', 'domestic'): 4, ('S', 'wild'): 1}),
 143: Counter({('Y', 'domestic'): 116, ('Y', 'wild'): 10, ('H', 'domestic'): 2}),
 144: Counter({('T', 'domestic'): 91, ('S', 'domestic'): 25, ('T', 'wild'): 9, ('N', 'domestic'): 1, ('N', 'wild'): 1, ('A', 'domestic'): 1}),
 146: Counter({('T', 'domestic'): 117, ('T', 'wild'): 10, ('K', 'domestic'): 1}),
 148: Counter({('K', 'domestic'): 117, ('K', 'wild'): 9, ('R', 'wild'): 1, ('S', 'domestic'): 1}),
 149: Counter({('S', 'domestic'): 85, ('A', 'domestic'): 33, ('S', 'wild'): 9, ('A', 'wild'): 1}),
 152: Counter({('D', 'domestic'): 95, ('G', 'domestic'): 10, ('N', 'domestic'): 9, ('D', 'wild'): 8, ('E', 'domestic'): 4, ('G', 'wild'): 2}),
 153: Counter({('S', 'domestic'): 117, ('S', 'wild'): 10, ('T', 'domestic'): 1}),
 157: Counter({('N', 'domestic'): 87, ('S', 'domestic'): 31, ('N', 'wild'): 9, ('S', 'wild'): 1}),
 162: Counter({('T', 'domestic'): 116, ('T', 'wild'): 10, ('S', 'domestic'): 1, ('I', 'domestic'): 1}),
 164: Counter({('K', 'domestic'): 116, ('K', 'wild'): 9, ('D', 'domestic'): 1, ('R', 'domestic'): 1, ('E', 'wild'): 1}),
 165: Counter({('N', 'domestic'): 83, ('S', 'domestic'): 34, ('S', 'wild'): 6, ('N', 'wild'): 3, ('R', 'wild'): 1, ('R', 'domestic'): 1}),
 167: Counter({('L', 'domestic'): 35, ('S', 'domestic'): 30, ('N', 'domestic'): 25, ('V', 'domestic'): 18, ('A', 'domestic'): 6, ('F', 'wild'): 5, ('V', 'wild'): 3, ('F', 'domestic'): 2, ('L', 'wild'): 1, ('S', 'wild'): 1, ('G', 'domestic'): 1, ('Q', 'domestic'): 1}),
 169: Counter({('P', 'domestic'): 117, ('P', 'wild'): 10, ('S', 'domestic'): 1}),
 170: Counter({('V', 'domestic'): 108, ('V', 'wild'): 10, ('I', 'domestic'): 9, ('A', 'domestic'): 1}),
 172: Counter({('D', 'domestic'): 118, ('D', 'wild'): 9, ('E', 'wild'): 1}),
 175: Counter({('Y', 'domestic'): 115, ('Y', 'wild'): 10, ('F', 'domestic'): 3}),
 178: Counter({('N', 'domestic'): 92, ('T', 'domestic'): 25, ('N', 'wild'): 10, ('D', 'domestic'): 1}),
 179: Counter({('R', 'domestic'): 110, ('R', 'wild'): 10, ('Q', 'domestic'): 6, ('G', 'domestic'): 1, ('M', 'domestic'): 1}),
 180: Counter({('G', 'domestic'): 100, ('E', 'domestic'): 18, ('G', 'wild'): 9, ('E', 'wild'): 1}),
 181: Counter({('K', 'domestic'): 116, ('K', 'wild'): 10, ('E', 'domestic'): 1, ('R', 'domestic'): 1}),
 182: Counter({('D', 'domestic'): 82, ('S', 'domestic'): 30, ('D', 'wild'): 9, ('N', 'domestic'): 6, ('S', 'wild'): 1}),
 183: Counter({('I', 'domestic'): 116, ('I', 'wild'): 10, ('V', 'domestic'): 2}),
 186: Counter({('V', 'domestic'): 111, ('V', 'wild'): 10, ('L', 'domestic'): 5, ('M', 'domestic'): 2}),
 193: Counter({('P', 'domestic'): 118, ('P', 'wild'): 9, ('A', 'wild'): 1}),
 194: Counter({('T', 'domestic'): 117, ('T', 'wild'): 10, ('S', 'domestic'): 1}),
 195: Counter({('D', 'domestic'): 112, ('D', 'wild'): 9, ('E', 'domestic'): 6, ('E', 'wild'): 1}),
 196: Counter({('T', 'domestic'): 116, ('T', 'wild'): 9, ('S', 'wild'): 1, ('S', 'domestic'): 1, ('I', 'domestic'): 1}),
 197: Counter({('A', 'domestic'): 91, ('T', 'domestic'): 10, ('E', 'domestic'): 8, ('V', 'domestic'): 7, ('A', 'wild'): 6, ('I', 'wild'): 1, ('S', 'domestic'): 1, ('I', 'domestic'): 1, ('E', 'wild'): 1, ('V', 'wild'): 1, ('T', 'wild'): 1}),
 198: Counter({('Q', 'domestic'): 117, ('Q', 'wild'): 10, ('X', 'domestic'): 1}),
 199: Counter({('T', 'domestic'): 117, ('R', 'wild'): 5, ('T', 'wild'): 4, ('R', 'domestic'): 1, ('M', 'wild'): 1}),
 200: Counter({('N', 'domestic'): 115, ('N', 'wild'): 9, ('S', 'domestic'): 1, ('D', 'domestic'): 1, ('D', 'wild'): 1, ('Q', 'domestic'): 1}),
 203: Counter({('T', 'domestic'): 93, ('I', 'domestic'): 24, ('I', 'wild'): 7, ('T', 'wild'): 3, ('X', 'domestic'): 1}),
 204: Counter({('R', 'domestic'): 102, ('N', 'domestic'): 16, ('R', 'wild'): 9, ('N', 'wild'): 1}),
 205: Counter({('T', 'domestic'): 112, ('T', 'wild'): 8, ('N', 'domestic'): 3, ('I', 'domestic'): 3, ('N', 'wild'): 2}),
 206: Counter({('D', 'domestic'): 118, ('D', 'wild'): 9, ('N', 'wild'): 1}),
 207: Counter({('T', 'domestic'): 117, ('T', 'wild'): 10, ('R', 'domestic'): 1}),
 208: Counter({('T', 'domestic'): 114, ('T', 'wild'): 10, ('I', 'domestic'): 2, ('X', 'domestic'): 1, ('A', 'domestic'): 1}),
 210: Counter({('S', 'domestic'): 115, ('S', 'wild'): 10, ('G', 'domestic'): 3}),
 211: Counter({('V', 'domestic'): 117, ('V', 'wild'): 10, ('I', 'domestic'): 1}),
 212: Counter({('T', 'domestic'): 105, ('T', 'wild'): 10, ('A', 'domestic'): 10, ('M', 'domestic'): 3}),
 215: Counter({('N', 'domestic'): 83, ('D', 'domestic'): 33, ('N', 'wild'): 9, ('S', 'domestic'): 2, ('D', 'wild'): 1}),
 217: Counter({('D', 'domestic'): 85, ('N', 'domestic'): 32, ('D', 'wild'): 9, ('N', 'wild'): 1, ('Y', 'domestic'): 1}),
 218: Counter({('R', 'domestic'): 117, ('R', 'wild'): 10, ('M', 'domestic'): 1}),
 219: Counter({('T', 'domestic'): 92, ('I', 'domestic'): 14, ('V', 'domestic'): 12, ('T', 'wild'): 9, ('I', 'wild'): 1}),
 220: Counter({('F', 'domestic'): 116, ('F', 'wild'): 10, ('Y', 'domestic'): 2}),
 221: Counter({('K', 'domestic'): 117, ('K', 'wild'): 6, ('N', 'wild'): 4, ('I', 'domestic'): 1}),
 223: Counter({('L', 'domestic'): 85, ('V', 'domestic'): 22, ('M', 'domestic'): 11, ('L', 'wild'): 9, ('V', 'wild'): 1}),
 224: Counter({('I', 'domestic'): 115, ('I', 'wild'): 10, ('V', 'domestic'): 3}),
 228: Counter({('P', 'domestic'): 117, ('P', 'wild'): 10, ('S', 'domestic'): 1}),
 230: Counter({('V', 'domestic'): 105, ('V', 'wild'): 10, ('A', 'domestic'): 10, ('L', 'domestic'): 3}),
 232: Counter({('G', 'domestic'): 118, ('G', 'wild'): 9, ('D', 'wild'): 1}),
 233: Counter({('L', 'domestic'): 106, ('Q', 'domestic'): 12, ('Q', 'wild'): 6, ('L', 'wild'): 4}),
 234: Counter({('I', 'domestic'): 83, ('Q', 'domestic'): 29, ('I', 'wild'): 4, ('T', 'wild'): 4, ('M', 'domestic'): 3, ('F', 'domestic'): 2, ('L', 'domestic'): 1, ('F', 'wild'): 1, ('Q', 'wild'): 1}),
 238: Counter({('N', 'domestic'): 117, ('N', 'wild'): 9, ('K', 'domestic'): 1, ('D', 'wild'): 1}),
 243: Counter({('V', 'domestic'): 104, ('I', 'domestic'): 13, ('I', 'wild'): 6, ('V', 'wild'): 4, ('X', 'domestic'): 1}),
 247: Counter({('G', 'domestic'): 117, ('G', 'wild'): 10, ('S', 'domestic'): 1}),
 262: Counter({('W', 'domestic'): 117, ('W', 'wild'): 10, ('R', 'domestic'): 1}),
 263: Counter({('F', 'domestic'): 63, ('Y', 'domestic'): 55, ('Y', 'wild'): 7, ('F', 'wild'): 3}),
 265: Counter({('H', 'domestic'): 117, ('H', 'wild'): 10, ('P', 'domestic'): 1}),
 266: Counter({('V', 'domestic'): 99, ('I', 'domestic'): 18, ('V', 'wild'): 10, ('A', 'domestic'): 1}),
 267: Counter({('L', 'domestic'): 117, ('L', 'wild'): 10, ('F', 'domestic'): 1}),
 269: Counter({('G', 'domestic'): 118, ('G', 'wild'): 9, ('E', 'wild'): 1}),
 270: Counter({('E', 'domestic'): 81, ('G', 'domestic'): 34, ('E', 'wild'): 9, ('V', 'domestic'): 3, ('G', 'wild'): 1}),
 276: Counter({('L', 'domestic'): 116, ('L', 'wild'): 10, ('M', 'domestic'): 1, ('X', 'domestic'): 1}),
 277: Counter({('K', 'domestic'): 111, ('K', 'wild'): 9, ('R', 'domestic'): 7, ('R', 'wild'): 1}),
 278: Counter({('T', 'domestic'): 117, ('T', 'wild'): 10, ('S', 'domestic'): 1}),
 279: Counter({('D', 'domestic'): 115, ('D', 'wild'): 10, ('G', 'domestic'): 2, ('N', 'domestic'): 1}),
 281: Counter({('N', 'domestic'): 91, ('K', 'domestic'): 18, ('N', 'wild'): 9, ('S', 'domestic'): 7, ('R', 'domestic'): 2, ('S', 'wild'): 1}),
 282: Counter({('S', 'domestic'): 108, ('S', 'wild'): 9, ('N', 'domestic'): 8, ('N', 'wild'): 1, ('I', 'domestic'): 1, ('G', 'domestic'): 1}),
 283: Counter({('G', 'domestic'): 117, ('G', 'wild'): 10, ('S', 'domestic'): 1}),
 284: Counter({('N', 'domestic'): 116, ('N', 'wild'): 10, ('D', 'domestic'): 1, ('S', 'domestic'): 1}),
 286: Counter({('V', 'domestic'): 115, ('V', 'wild'): 10, ('I', 'domestic'): 2, ('A', 'domestic'): 1}),
 290: Counter({('Q', 'domestic'): 117, ('Q', 'wild'): 10, ('H', 'domestic'): 1}),
 291: Counter({('T', 'domestic'): 115, ('T', 'wild'): 10, ('P', 'domestic'): 3}),
 297: Counter({('N', 'domestic'): 117, ('N', 'wild'): 10, ('S', 'domestic'): 1}),
 299: Counter({('T', 'domestic'): 115, ('T', 'wild'): 10, ('S', 'domestic'): 3}),
 300: Counter({('L', 'domestic'): 116, ('L', 'wild'): 10, ('F', 'domestic'): 1, ('M', 'domestic'): 1}),
 301: Counter({('P', 'domestic'): 117, ('P', 'wild'): 10, ('A', 'domestic'): 1}),
 303: Counter({('H', 'domestic'): 117, ('H', 'wild'): 10, ('X', 'domestic'): 1}),
 305: Counter({('I', 'domestic'): 115, ('V', 'wild'): 6, ('I', 'wild'): 4, ('V', 'domestic'): 2, ('L', 'domestic'): 1}),
 307: Counter({('K', 'domestic'): 117, ('K', 'wild'): 10, ('R', 'domestic'): 1}),
 308: Counter({('Y', 'domestic'): 111, ('Y', 'wild'): 10, ('F', 'domestic'): 7}),
 311: Counter({('G', 'domestic'): 117, ('G', 'wild'): 10, ('W', 'domestic'): 1}),
 312: Counter({('T', 'domestic'): 61, ('D', 'domestic'): 24, ('N', 'domestic'): 23, ('I', 'domestic'): 10, ('T', 'wild'): 9, ('I', 'wild'): 1}),
 316: Counter({('Y', 'domestic'): 117, ('Y', 'wild'): 10, ('W', 'domestic'): 1}),
 317: Counter({('I', 'domestic'): 95, ('V', 'domestic'): 23, ('I', 'wild'): 9, ('V', 'wild'): 1}),
 318: Counter({('G', 'domestic'): 86, ('R', 'domestic'): 32, ('G', 'wild'): 9, ('R', 'wild'): 1}),
 320: Counter({('K', 'domestic'): 116, ('K', 'wild'): 10, ('N', 'domestic'): 2}),
 326: Counter({('I', 'domestic'): 90, ('V', 'domestic'): 28, ('I', 'wild'): 9, ('V', 'wild'): 1}),
 332: Counter({('P', 'domestic'): 111, ('P', 'wild'): 9, ('H', 'domestic'): 7, ('H', 'wild'): 1}),
 333: Counter({('A', 'domestic'): 117, ('A', 'wild'): 10, ('S', 'domestic'): 1}),
 334: Counter({('R', 'domestic'): 107, ('K', 'domestic'): 11, ('R', 'wild'): 10}),
 336: Counter({('S', 'domestic'): 112, ('S', 'wild'): 9, ('N', 'domestic'): 2, ('K', 'domestic'): 2, ('K', 'wild'): 1, ('R', 'domestic'): 1, ('G', 'domestic'): 1}),
 338: Counter({('G', 'domestic'): 117, ('G', 'wild'): 10, ('A', 'domestic'): 1}),
 350: Counter({('G', 'domestic'): 118, ('G', 'wild'): 9, ('N', 'wild'): 1}),
 351: Counter({('W', 'domestic'): 118, ('W', 'wild'): 9, ('R', 'wild'): 1}),
 356: Counter({('A', 'domestic'): 110, ('A', 'wild'): 10, ('S', 'domestic'): 8}),
 362: Counter({('Q', 'domestic'): 117, ('Q', 'wild'): 10, ('L', 'domestic'): 1}),
 364: Counter({('S', 'domestic'): 116, ('S', 'wild'): 10, ('T', 'domestic'): 1, ('L', 'domestic'): 1}),
 366: Counter({('D', 'domestic'): 117, ('D', 'wild'): 9, ('E', 'domestic'): 1, ('E', 'wild'): 1}),
 369: Counter({('V', 'domestic'): 117, ('V', 'wild'): 10, ('I', 'domestic'): 1}),
 371: Counter({('M', 'domestic'): 110, ('M', 'wild'): 10, ('I', 'domestic'): 6, ('V', 'domestic'): 2}),
 376: Counter({('D', 'domestic'): 111, ('D', 'wild'): 9, ('V', 'domestic'): 5, ('E', 'domestic'): 1, ('G', 'domestic'): 1, ('V', 'wild'): 1}),
 380: Counter({('K', 'domestic'): 111, ('K', 'wild'): 10, ('R', 'domestic'): 6, ('E', 'domestic'): 1}),
 381: Counter({('A', 'domestic'): 117, ('A', 'wild'): 10, ('S', 'domestic'): 1}),
 382: Counter({('V', 'domestic'): 66, ('I', 'domestic'): 49, ('V', 'wild'): 9, ('F', 'domestic'): 3, ('I', 'wild'): 1}),
 384: Counter({('K', 'domestic'): 116, ('K', 'wild'): 10, ('R', 'domestic'): 2}),
 387: Counter({('S', 'domestic'): 104, ('S', 'wild'): 10, ('A', 'domestic'): 8, ('T', 'domestic'): 6}),
 391: Counter({('N', 'domestic'): 99, ('S', 'domestic'): 11, ('N', 'wild'): 10, ('T', 'domestic'): 8}),
 393: Counter({('V', 'domestic'): 114, ('V', 'wild'): 10, ('I', 'domestic'): 4}),
 394: Counter({('D', 'domestic'): 116, ('D', 'wild'): 10, ('G', 'domestic'): 1, ('E', 'domestic'): 1}),
 398: Counter({('K', 'domestic'): 117, ('K', 'wild'): 10, ('R', 'domestic'): 1}),
 402: Counter({('I', 'domestic'): 115, ('I', 'wild'): 10, ('V', 'domestic'): 3}),
 403: Counter({('I', 'domestic'): 116, ('I', 'wild'): 10, ('V', 'domestic'): 1, ('T', 'domestic'): 1}),
 408: Counter({('S', 'domestic'): 114, ('S', 'wild'): 10, ('N', 'domestic'): 4}),
 410: Counter({('V', 'domestic'): 111, ('V', 'wild'): 10, ('I', 'domestic'): 6, ('L', 'domestic'): 1}),
 412: Counter({('T', 'domestic'): 116, ('T', 'wild'): 10, ('A', 'domestic'): 2}),
 419: Counter({('N', 'domestic'): 117, ('N', 'wild'): 10, ('D', 'domestic'): 1}),
 421: Counter({('I', 'domestic'): 117, ('I', 'wild'): 10, ('V', 'domestic'): 1}),
 427: Counter({('D', 'domestic'): 101, ('N', 'domestic'): 17, ('D', 'wild'): 10}),
 428: Counter({('V', 'domestic'): 110, ('V', 'wild'): 9, ('I', 'domestic'): 8, ('I', 'wild'): 1}),
 437: Counter({('V', 'domestic'): 117, ('V', 'wild'): 10, ('I', 'domestic'): 1}),
 442: Counter({('Q', 'domestic'): 117, ('Q', 'wild'): 10, ('R', 'domestic'): 1}),
 447: Counter({('E', 'domestic'): 117, ('E', 'wild'): 10, ('D', 'domestic'): 1}),
 448: Counter({('H', 'domestic'): 117, ('H', 'wild'): 10, ('C', 'domestic'): 1}),
 449: Counter({('D', 'domestic'): 117, ('D', 'wild'): 10, ('G', 'domestic'): 1}),
 458: Counter({('K', 'domestic'): 116, ('K', 'wild'): 10, ('R', 'domestic'): 1, ('Q', 'domestic'): 1}),
 460: Counter({('K', 'domestic'): 115, ('K', 'wild'): 10, ('R', 'domestic'): 3}),
 462: Counter({('A', 'domestic'): 115, ('A', 'wild'): 10, ('T', 'domestic'): 3}),
 465: Counter({('S', 'domestic'): 114, ('S', 'wild'): 10, ('Y', 'domestic'): 3, ('F', 'domestic'): 1}),
 466: Counter({('N', 'domestic'): 117, ('N', 'wild'): 10, ('H', 'domestic'): 1}),
 468: Counter({('M', 'domestic'): 99, ('I', 'domestic'): 11, ('M', 'wild'): 8, ('V', 'domestic'): 7, ('I', 'wild'): 2, ('K', 'domestic'): 1}),
 470: Counter({('D', 'domestic'): 117, ('D', 'wild'): 10, ('N', 'domestic'): 1}),
 472: Counter({('K', 'domestic'): 117, ('K', 'wild'): 10, ('N', 'domestic'): 1}),
 476: Counter({('E', 'domestic'): 116, ('E', 'wild'): 10, ('D', 'domestic'): 1, ('Q', 'domestic'): 1}),
 478: Counter({('Y', 'domestic'): 117, ('Y', 'wild'): 10, ('S', 'domestic'): 1}),
 480: Counter({('K', 'domestic'): 117, ('K', 'wild'): 10, ('R', 'domestic'): 1}),
 481: Counter({('C', 'domestic'): 117, ('C', 'wild'): 10, ('W', 'domestic'): 1}),
 482: Counter({('D', 'domestic'): 115, ('D', 'wild'): 10, ('N', 'domestic'): 1, ('G', 'domestic'): 1, ('E', 'domestic'): 1}),
 483: Counter({('D', 'domestic'): 92, ('N', 'domestic'): 24, ('D', 'wild'): 8, ('N', 'wild'): 2, ('E', 'domestic'): 1, ('G', 'domestic'): 1}),
 484: Counter({('Q', 'domestic'): 114, ('Q', 'wild'): 10, ('K', 'domestic'): 2, ('L', 'domestic'): 1, ('H', 'domestic'): 1}),
 486: Counter({('M', 'domestic'): 117, ('M', 'wild'): 10, ('V', 'domestic'): 1}),
 487: Counter({('E', 'domestic'): 117, ('E', 'wild'): 10, ('G', 'domestic'): 1}),
 488: Counter({('T', 'domestic'): 117, ('T', 'wild'): 10, ('A', 'domestic'): 1}),
 490: Counter({('R', 'domestic'): 113, ('R', 'wild'): 9, ('K', 'domestic'): 3, ('Q', 'domestic'): 2, ('Q', 'wild'): 1}),
 491: Counter({('N', 'domestic'): 117, ('N', 'wild'): 10, ('D', 'domestic'): 1}),
 495: Counter({('N', 'domestic'): 113, ('N', 'wild'): 10, ('D', 'domestic'): 3, ('S', 'domestic'): 1, ('L', 'domestic'): 1}),
 496: Counter({('R', 'domestic'): 114, ('R', 'wild'): 10, ('K', 'domestic'): 4}),
 497: Counter({('R', 'domestic'): 115, ('R', 'wild'): 9, ('K', 'domestic'): 1, ('K', 'wild'): 1, ('G', 'domestic'): 1, ('Q', 'domestic'): 1}),
 500: Counter({('K', 'domestic'): 101, ('K', 'wild'): 9, ('T', 'domestic'): 7, ('R', 'domestic'): 5, ('Q', 'domestic'): 4, ('M', 'domestic'): 1, ('T', 'wild'): 1}),
 503: Counter({('S', 'domestic'): 105, ('S', 'wild'): 10, ('A', 'domestic'): 10, ('V', 'domestic'): 2, ('P', 'domestic'): 1}),
 504: Counter({('R', 'domestic'): 112, ('R', 'wild'): 9, ('K', 'domestic'): 6, ('G', 'wild'): 1}),
 506: Counter({('E', 'domestic'): 107, ('E', 'wild'): 10, ('G', 'domestic'): 7, ('K', 'domestic'): 4}),
 508: Counter({('Q', 'domestic'): 117, ('Q', 'wild'): 10, ('L', 'domestic'): 1}),
 509: Counter({('K', 'domestic'): 113, ('K', 'wild'): 10, ('R', 'domestic'): 5}),
 510: Counter({('I', 'domestic'): 115, ('I', 'wild'): 10, ('L', 'domestic'): 2, ('R', 'domestic'): 1}),
 511: Counter({('E', 'domestic'): 116, ('E', 'wild'): 10, ('D', 'domestic'): 2}),
 513: Counter({('V', 'domestic'): 117, ('V', 'wild'): 10, ('I', 'domestic'): 1}),
 516: Counter({('E', 'domestic'): 117, ('E', 'wild'): 10, ('K', 'domestic'): 1}),
 517: Counter({('S', 'domestic'): 112, ('S', 'wild'): 9, ('A', 'domestic'): 4, ('Y', 'domestic'): 2, ('A', 'wild'): 1}),
 519: Counter({('G', 'domestic'): 116, ('G', 'wild'): 10, ('E', 'domestic'): 2}),
 520: Counter({('T', 'domestic'): 109, ('T', 'wild'): 9, ('I', 'domestic'): 8, ('N', 'domestic'): 1, ('N', 'wild'): 1}),
 525: Counter({('T', 'domestic'): 107, ('S', 'domestic'): 10, ('T', 'wild'): 9, ('S', 'wild'): 1, ('A', 'domestic'): 1}),
 528: Counter({('S', 'domestic'): 116, ('S', 'wild'): 10, ('T', 'domestic'): 1, ('Y', 'domestic'): 1}),
 529: Counter({('T', 'domestic'): 117, ('T', 'wild'): 10, ('K', 'domestic'): 1}),
 530: Counter({('V', 'domestic'): 113, ('V', 'wild'): 9, ('A', 'domestic'): 4, ('G', 'domestic'): 1, ('A', 'wild'): 1}),
 531: Counter({('A', 'domestic'): 118, ('A', 'wild'): 9, ('P', 'wild'): 1}),
 534: Counter({('L', 'domestic'): 116, ('L', 'wild'): 10, ('G', 'domestic'): 2}),
 535: Counter({('V', 'domestic'): 116, ('V', 'wild'): 10, ('I', 'domestic'): 1, ('Y', 'domestic'): 1}),
 536: Counter({('L', 'domestic'): 115, ('L', 'wild'): 10, ('I', 'domestic'): 2, ('P', 'domestic'): 1}),
 537: Counter({('A', 'domestic'): 117, ('A', 'wild'): 10, ('Q', 'domestic'): 1}),
 538: Counter({('M', 'domestic'): 107, ('M', 'wild'): 10, ('I', 'domestic'): 6, ('T', 'domestic'): 2, ('W', 'domestic'): 1, ('K', 'domestic'): 1, ('V', 'domestic'): 1}),
 539: Counter({('G', 'domestic'): 117, ('G', 'wild'): 8, ('V', 'wild'): 2, ('R', 'domestic'): 1}),
 540: Counter({('F', 'domestic'): 112, ('F', 'wild'): 9, ('S', 'domestic'): 4, ('L', 'domestic'): 2, ('L', 'wild'): 1}),
 541: Counter({('A', 'domestic'): 117, ('A', 'wild'): 10, ('F', 'domestic'): 1}),
 544: Counter({('L', 'domestic'): 116, ('L', 'wild'): 10, ('M', 'domestic'): 2}),
 547: Counter({('A', 'domestic'): 117, ('A', 'wild'): 10, ('V', 'domestic'): 1}),
 548: Counter({('M', 'domestic'): 117, ('M', 'wild'): 10, ('S', 'domestic'): 1}),
 550: Counter({('N', 'domestic'): 116, ('N', 'wild'): 10, ('G', 'domestic'): 1, ('I', 'domestic'): 1}),
 551: Counter({('G', 'domestic'): 115, ('G', 'wild'): 10, ('V', 'domestic'): 1, ('L', 'domestic'): 1, ('E', 'domestic'): 1}),
 552: Counter({('S', 'domestic'): 116, ('S', 'wild'): 10, ('F', 'domestic'): 1, ('T', 'domestic'): 1}),
 553: Counter({('C', 'domestic'): 116, ('C', 'wild'): 10, ('G', 'domestic'): 1, ('S', 'domestic'): 1}),
 554: Counter({('R', 'domestic'): 117, ('R', 'wild'): 8, ('W', 'wild'): 2, ('K', 'domestic'): 1}),
 555: Counter({('C', 'domestic'): 116, ('C', 'wild'): 10, ('V', 'domestic'): 1, ('S', 'domestic'): 1}),
 556: Counter({('N', 'domestic'): 112, ('N', 'wild'): 10, ('S', 'domestic'): 3, ('I', 'domestic'): 1, ('D', 'domestic'): 1, ('T', 'domestic'): 1}),
 557: Counter({('I', 'domestic'): 116, ('I', 'wild'): 10, ('M', 'domestic'): 1, ('Y', 'domestic'): 1}),
 558: Counter({('C', 'domestic'): 115, ('C', 'wild'): 10, ('-', 'domestic'): 2, ('E', 'domestic'): 1}),
 559: Counter({('I', 'domestic'): 112, ('I', 'wild'): 10, ('-', 'domestic'): 3, ('V', 'domestic'): 2, ('L', 'domestic'): 1})}

In [13]:
def wild_domestic_proportions_piecharts(counter, pos):
    wild_letters = [letter for (letter, state) in counter[pos].keys() if state == 'wild']
    domestic_letters = [letter for (letter, state) in counter[pos].keys() if state == 'domestic']
    letters = set([letter for (letter, state) in counter[pos].keys()])

    wild_proportions = dict()
    domestic_proportions = dict()
    for letter in letters:
        if letter in wild_letters:
            wild_proportions[letter] = counter[pos][(letter, 'wild')]
        if letter not in wild_letters:
            wild_proportions[letter] = 0
        if letter in domestic_letters:
            domestic_proportions[letter] = counter[pos][(letter, 'domestic')]
        if letter not in domestic_letters:
            domestic_proportions[letter] = 0

    fig = plt.figure(figsize=(6.5,3))
    
    if len(letters) < 3:
        bmap = brewer2mpl.get_map('Set1', 'qualitative', 3)
    else:
        bmap = brewer2mpl.get_map('Set1', 'qualitative', len(letters))
    colors = bmap.mpl_colors

    # Wild Bird Proportion of AAs
    ax1 = fig.add_subplot(121)
    plt.pie(wild_proportions.values(), colors=colors)
    plt.title('Wild, Pos %s, n=%s' % (pos, sum(wild_proportions.values())))
    # Domestic Bird Proportion of AAs
    ax2 = fig.add_subplot(122)
    plt.pie(domestic_proportions.values(), colors=colors)
    plt.title('Domestic, Pos %s, n=%s' % (pos, sum(domestic_proportions.values())))
    
    plt.legend(labels=letters, bbox_to_anchor=[1.3,1])
    plt.savefig('%s Position %s Proportion of Amino Acids.pdf' % (gene, pos))
    plt.show()

In [14]:
# def wild_domestic_proportions(counter, ranked_positions):

In [15]:
for pos, importance in pos_importance[0:10]:
    wild_domestic_proportions_piecharts(counts, pos)



In [15]:


In [15]:


In [15]: